library(tidyverse)
library(lubridate)
library(DT)
library(viridis)
library(janitor)
library(plotly)
library(respirometry)lab 13
NEON MAG DATA REPORT
soilMAG_data <- read.csv("NEON_soilMAGs_soilChem.csv")
head(soilMAG_data) X genomicsSampleID decimalLatitude decimalLongitude elevation
1 1 BONA_001-O-20230710-COMP 65.17445 -147.4782 374.1
2 2 BONA_001-O-20230710-COMP 65.17445 -147.4782 374.1
3 3 BONA_001-O-20230710-COMP 65.17445 -147.4782 374.1
4 4 BONA_001-O-20230710-COMP 65.17445 -147.4782 374.1
5 5 BONA_001-O-20230710-COMP 65.17445 -147.4782 374.1
6 6 BONA_001-O-20230710-COMP 65.17445 -147.4782 374.1
soilTemp soilMoisture soilInWaterpH siteID bin_oid bin_id
1 8.1 1.546 3.993976 BONA 3300078357_s0 3300078357_s0
2 8.1 1.546 3.993976 BONA 3300078357_s10 3300078357_s10
3 8.1 1.546 3.993976 BONA 3300078357_s12 3300078357_s12
4 8.1 1.546 3.993976 BONA 3300078357_s13 3300078357_s13
5 8.1 1.546 3.993976 BONA 3300078357_s14 3300078357_s14
6 8.1 1.546 3.993976 BONA 3300078357_s19 3300078357_s19
site
1 Caribou-Poker Creeks Research Watershed NEON Field Site, Alaska, USA
2 Caribou-Poker Creeks Research Watershed NEON Field Site, Alaska, USA
3 Caribou-Poker Creeks Research Watershed NEON Field Site, Alaska, USA
4 Caribou-Poker Creeks Research Watershed NEON Field Site, Alaska, USA
5 Caribou-Poker Creeks Research Watershed NEON Field Site, Alaska, USA
6 Caribou-Poker Creeks Research Watershed NEON Field Site, Alaska, USA
sample_name subplot layer quadrant date img_genome_id bin_quality
1 BONA_070-O-20230712 70 O NA 20230712 3300078357 MQ
2 BONA_070-O-20230712 70 O NA 20230712 3300078357 MQ
3 BONA_070-O-20230712 70 O NA 20230712 3300078357 MQ
4 BONA_070-O-20230712 70 O NA 20230712 3300078357 MQ
5 BONA_070-O-20230712 70 O NA 20230712 3300078357 MQ
6 BONA_070-O-20230712 70 O NA 20230712 3300078357 MQ
bin_lineage
1 Bacteria; Acidobacteriota
2 Bacteria; Acidobacteriota; Terriglobia; Terriglobales; Acidobacteriaceae
3 Bacteria
4 Bacteria; Pseudomonadota; Alphaproteobacteria; Rhodospirillales; Acetobacteraceae; Acidisoma; Acidisoma sp. L85
5 Bacteria; Acidobacteriota; Terriglobia
6 Bacteria; Pseudomonadota; Gammaproteobacteria; Lysobacterales; Rhodanobacteraceae; Luteibacter; Luteibacter sp. OK325
gtdb_taxonomy_lineage
1 Bacteria; Acidobacteriota; Terriglobia; Acidoferrales; UBA7541; Acidoferrum
2 Bacteria; Acidobacteriota; Terriglobia; Terriglobales; Acidobacteriaceae; PALSA-350
3 Bacteria; Myxococcota; Myxococcia; Myxococcales; Myxococcaceae; JADGRA01
4 Bacteria; Pseudomonadota; Alphaproteobacteria; Acetobacterales; Acetobacteraceae; Acidisoma; Acidisoma sp009765865
5 Bacteria; Acidobacteriota; Terriglobia; Acidoferrales; UBA7541; Palsa-295
6 Bacteria; Pseudomonadota; Gammaproteobacteria; Xanthomonadales; Rhodanobacteraceae; Luteibacter
domain phylum class order
1 Bacteria Acidobacteriota Terriglobia Acidoferrales
2 Bacteria Acidobacteriota Terriglobia Terriglobales
3 Bacteria Myxococcota Myxococcia Myxococcales
4 Bacteria Pseudomonadota Alphaproteobacteria Acetobacterales
5 Bacteria Acidobacteriota Terriglobia Acidoferrales
6 Bacteria Pseudomonadota Gammaproteobacteria Xanthomonadales
family genus
1 UBA7541 Acidoferrum
2 Acidobacteriaceae PALSA-350
3 Myxococcaceae JADGRA01
4 Acetobacteraceae Acidisoma
5 UBA7541 Palsa-295
6 Rhodanobacteraceae Luteibacter
bin_methods
1 SemiBin2:v2.1.0, CheckM2:v1.0.2, GTDB-Tk:v2.4.0, GTDB-Tk-database:release220
2 SemiBin2:v2.1.0, CheckM2:v1.0.2, GTDB-Tk:v2.4.0, GTDB-Tk-database:release220
3 SemiBin2:v2.1.0, CheckM2:v1.0.2, GTDB-Tk:v2.4.0, GTDB-Tk-database:release220
4 SemiBin2:v2.1.0, CheckM2:v1.0.2, GTDB-Tk:v2.4.0, GTDB-Tk-database:release220
5 SemiBin2:v2.1.0, CheckM2:v1.0.2, GTDB-Tk:v2.4.0, GTDB-Tk-database:release220
6 SemiBin2:v2.1.0, CheckM2:v1.0.2, GTDB-Tk:v2.4.0, GTDB-Tk-database:release220
created_by date_added bin_completeness bin_contamination average_coverage
1 IMG_PIPELINE 2025-01-24 95.64 4.79 NA
2 IMG_PIPELINE 2025-01-24 62.45 2.88 NA
3 IMG_PIPELINE 2025-01-24 90.00 0.04 NA
4 IMG_PIPELINE 2025-01-24 87.72 1.90 NA
5 IMG_PIPELINE 2025-01-24 79.33 1.17 NA
6 IMG_PIPELINE 2025-01-24 80.17 2.58 NA
total_number_of_bases x5s_r_rna x16s_r_rna x23s_r_rna t_rna_genes gene_count
1 4541925 0 2 1 43 4073
2 4850766 0 0 1 31 4301
3 4108080 1 0 0 49 4046
4 3014437 1 0 1 27 3091
5 3187874 0 0 0 24 3086
6 3917029 0 0 0 34 3943
scaffold_count gold_study_id community type sample_unknown
1 163 Gs0166454 Soil microbial communities Soil NA
2 269 Gs0166454 Soil microbial communities Soil NA
3 394 Gs0166454 Soil microbial communities Soil NA
4 281 Gs0166454 Soil microbial communities Soil NA
5 247 Gs0166454 Soil microbial communities Soil NA
6 466 Gs0166454 Soil microbial communities Soil NA
Environmental Correlations
show the relationship between environmental conditions and samples
Elevation
df <- read.csv("NEON_soilMAGs_soilChem.csv")
subset_idelev <- df[, c("img_genome_id", "elevation")]
head(subset_idelev) img_genome_id elevation
1 3300078357 374.1
2 3300078357 374.1
3 3300078357 374.1
4 3300078357 374.1
5 3300078357 374.1
6 3300078357 374.1
write.csv(subset_idelev, "genome_elevation.csv", row.names = FALSE)library(ggplot2)
ggplot(subset_idelev, aes(x = img_genome_id, y = elevation)) +
geom_bar(stat = "identity", fill = "darkgreen") +
labs(x = "Genome ID", y = "Elevation") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))ggplot(subset_idelev, aes(x = img_genome_id, y = elevation)) +
geom_point(color = "darkgreen", size = 3) +
labs(x = "Genome ID", y = "Elevation") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))Soil Moisture
df <- read.csv("NEON_soilMAGs_soilChem.csv")
subset_idmois <- df[, c("img_genome_id", "soilMoisture")]
head(subset_idmois) img_genome_id soilMoisture
1 3300078357 1.546
2 3300078357 1.546
3 3300078357 1.546
4 3300078357 1.546
5 3300078357 1.546
6 3300078357 1.546
write.csv(subset_idmois, "genome_soilMoisture.csv", row.names = FALSE)library(ggplot2)
ggplot(subset_idmois, aes(x = img_genome_id, y = soilMoisture)) +
geom_bar(stat = "identity", fill = "blue") +
labs(x = "Genome ID", y = "Soil Moisture") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))ggplot(subset_idmois, aes(x = img_genome_id, y = soilMoisture)) +
geom_point(color = "blue", size = 3) +
labs(x = "Genome ID", y = "Soil Moisture") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))Soil Temperature
df <- read.csv("NEON_soilMAGs_soilChem.csv")
subset_idtemp <- df[, c("img_genome_id", "soilTemp")]
head(subset_idtemp) img_genome_id soilTemp
1 3300078357 8.1
2 3300078357 8.1
3 3300078357 8.1
4 3300078357 8.1
5 3300078357 8.1
6 3300078357 8.1
write.csv(subset_idtemp, "genome_soilTemp.csv", row.names = FALSE)library(ggplot2)
ggplot(subset_idtemp, aes(x = img_genome_id, y = soilTemp)) +
geom_point(color = "darkred", size = 3) +
labs(x = "Genome ID", y = "Soil Temperature") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))Biogeography and spatial patterns
Mapping phylum distributions
shows the distribution of the phylum of the samples collected
df <- read.csv("NEON_soilMAGs_soilChem.csv")
location_table <- df[, c("phylum", "decimalLatitude", "decimalLongitude")]
head(location_table) phylum decimalLatitude decimalLongitude
1 Acidobacteriota 65.17445 -147.4782
2 Acidobacteriota 65.17445 -147.4782
3 Myxococcota 65.17445 -147.4782
4 Pseudomonadota 65.17445 -147.4782
5 Acidobacteriota 65.17445 -147.4782
6 Pseudomonadota 65.17445 -147.4782
#install.packages(c("ggplot2", "maps", "mapdata"))
library(ggplot2)
library(maps)
library(mapdata)us_map <- map_data("state")df <- read.csv("NEON_soilMAGs_soilChem.csv")
mags_clean <- df %>%
filter(decimalLongitude >= -125, decimalLongitude <= -66,
decimalLatitude >= 25, decimalLatitude <= 50)
ggplot() +
geom_polygon(data = us_map,
aes(x = long, y = lat, group = group),
fill = "gray90", color = "black") +
geom_point(data = mags_clean,
aes(x = decimalLongitude, y = decimalLatitude, color = phylum),
size = 10, alpha = 0.7) +
coord_quickmap() +
theme_minimal() +
labs(title = "MAGs Found Across the United States",
x = "Longitude", y = "Latitude",
color = "Phylum") theme_minimal() +
theme(
axis.title.x = element_text(size = 150),
axis.title.y = element_text(size = 150),
axis.text.x = element_text(size = 100),
axis.text.y = element_text(size = 100)
)List of 136
$ line :List of 6
..$ colour : chr "black"
..$ linewidth : num 0.5
..$ linetype : num 1
..$ lineend : chr "butt"
..$ arrow : logi FALSE
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_line" "element"
$ rect :List of 5
..$ fill : chr "white"
..$ colour : chr "black"
..$ linewidth : num 0.5
..$ linetype : num 1
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_rect" "element"
$ text :List of 11
..$ family : chr ""
..$ face : chr "plain"
..$ colour : chr "black"
..$ size : num 11
..$ hjust : num 0.5
..$ vjust : num 0.5
..$ angle : num 0
..$ lineheight : num 0.9
..$ margin : 'margin' num [1:4] 0points 0points 0points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : logi FALSE
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ title : NULL
$ aspect.ratio : NULL
$ axis.title : NULL
$ axis.title.x :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : num 150
..$ hjust : NULL
..$ vjust : num 1
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 2.75points 0points 0points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi FALSE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.title.x.top :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : NULL
..$ vjust : num 0
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 0points 0points 2.75points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.title.x.bottom : NULL
$ axis.title.y :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : num 150
..$ hjust : NULL
..$ vjust : num 1
..$ angle : num 90
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 0points 2.75points 0points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi FALSE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.title.y.left : NULL
$ axis.title.y.right :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : NULL
..$ vjust : num 1
..$ angle : num -90
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 0points 0points 0points 2.75points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.text :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : chr "grey30"
..$ size : 'rel' num 0.8
..$ hjust : NULL
..$ vjust : NULL
..$ angle : NULL
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.text.x :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : num 100
..$ hjust : NULL
..$ vjust : num 1
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 2.2points 0points 0points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi FALSE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.text.x.top :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : NULL
..$ vjust : num 0
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 0points 0points 2.2points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.text.x.bottom : NULL
$ axis.text.y :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : num 100
..$ hjust : num 1
..$ vjust : NULL
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 0points 2.2points 0points 0points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi FALSE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.text.y.left : NULL
$ axis.text.y.right :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 0
..$ vjust : NULL
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 0points 0points 0points 2.2points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.text.theta : NULL
$ axis.text.r :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 0.5
..$ vjust : NULL
..$ angle : NULL
..$ lineheight : NULL
..$ margin : 'margin' num [1:4] 0points 2.2points 0points 2.2points
.. ..- attr(*, "unit")= int 8
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ axis.ticks : list()
..- attr(*, "class")= chr [1:2] "element_blank" "element"
$ axis.ticks.x : NULL
$ axis.ticks.x.top : NULL
$ axis.ticks.x.bottom : NULL
$ axis.ticks.y : NULL
$ axis.ticks.y.left : NULL
$ axis.ticks.y.right : NULL
$ axis.ticks.theta : NULL
$ axis.ticks.r : NULL
$ axis.minor.ticks.x.top : NULL
$ axis.minor.ticks.x.bottom : NULL
$ axis.minor.ticks.y.left : NULL
$ axis.minor.ticks.y.right : NULL
$ axis.minor.ticks.theta : NULL
$ axis.minor.ticks.r : NULL
$ axis.ticks.length : 'simpleUnit' num 2.75points
..- attr(*, "unit")= int 8
$ axis.ticks.length.x : NULL
$ axis.ticks.length.x.top : NULL
$ axis.ticks.length.x.bottom : NULL
$ axis.ticks.length.y : NULL
$ axis.ticks.length.y.left : NULL
$ axis.ticks.length.y.right : NULL
$ axis.ticks.length.theta : NULL
$ axis.ticks.length.r : NULL
$ axis.minor.ticks.length : 'rel' num 0.75
$ axis.minor.ticks.length.x : NULL
$ axis.minor.ticks.length.x.top : NULL
$ axis.minor.ticks.length.x.bottom: NULL
$ axis.minor.ticks.length.y : NULL
$ axis.minor.ticks.length.y.left : NULL
$ axis.minor.ticks.length.y.right : NULL
$ axis.minor.ticks.length.theta : NULL
$ axis.minor.ticks.length.r : NULL
$ axis.line : list()
..- attr(*, "class")= chr [1:2] "element_blank" "element"
$ axis.line.x : NULL
$ axis.line.x.top : NULL
$ axis.line.x.bottom : NULL
$ axis.line.y : NULL
$ axis.line.y.left : NULL
$ axis.line.y.right : NULL
$ axis.line.theta : NULL
$ axis.line.r : NULL
$ legend.background : list()
..- attr(*, "class")= chr [1:2] "element_blank" "element"
$ legend.margin : 'margin' num [1:4] 5.5points 5.5points 5.5points 5.5points
..- attr(*, "unit")= int 8
$ legend.spacing : 'simpleUnit' num 11points
..- attr(*, "unit")= int 8
$ legend.spacing.x : NULL
$ legend.spacing.y : NULL
$ legend.key : list()
..- attr(*, "class")= chr [1:2] "element_blank" "element"
$ legend.key.size : 'simpleUnit' num 1.2lines
..- attr(*, "unit")= int 3
$ legend.key.height : NULL
$ legend.key.width : NULL
$ legend.key.spacing : 'simpleUnit' num 5.5points
..- attr(*, "unit")= int 8
$ legend.key.spacing.x : NULL
$ legend.key.spacing.y : NULL
$ legend.frame : NULL
$ legend.ticks : NULL
$ legend.ticks.length : 'rel' num 0.2
$ legend.axis.line : NULL
$ legend.text :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : 'rel' num 0.8
..$ hjust : NULL
..$ vjust : NULL
..$ angle : NULL
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ legend.text.position : NULL
$ legend.title :List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 0
..$ vjust : NULL
..$ angle : NULL
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi TRUE
..- attr(*, "class")= chr [1:2] "element_text" "element"
$ legend.title.position : NULL
$ legend.position : chr "right"
$ legend.position.inside : NULL
$ legend.direction : NULL
$ legend.byrow : NULL
$ legend.justification : chr "center"
$ legend.justification.top : NULL
$ legend.justification.bottom : NULL
$ legend.justification.left : NULL
$ legend.justification.right : NULL
$ legend.justification.inside : NULL
$ legend.location : NULL
$ legend.box : NULL
$ legend.box.just : NULL
$ legend.box.margin : 'margin' num [1:4] 0cm 0cm 0cm 0cm
..- attr(*, "unit")= int 1
$ legend.box.background : list()
..- attr(*, "class")= chr [1:2] "element_blank" "element"
$ legend.box.spacing : 'simpleUnit' num 11points
..- attr(*, "unit")= int 8
[list output truncated]
- attr(*, "class")= chr [1:2] "theme" "gg"
- attr(*, "complete")= logi TRUE
- attr(*, "validate")= logi TRUE
Spatial autocorrelation
shows the spatial correlation of genus richness (if sites with more genus richness are closer to other sites with genus richness) a positive slope indicates that high richness sites occur near each other.
library(dplyr)
df_unique <- df %>%
distinct(decimalLatitude, decimalLongitude, .keep_all = TRUE)library(dplyr)
library(sf)
library(spdep)
df_unique <- df %>%
distinct(decimalLatitude, decimalLongitude, .keep_all = TRUE)
df_sites <- df %>%
group_by(decimalLatitude, decimalLongitude) %>%
summarise(
genus_richness = n_distinct(genus),
family_richness = n_distinct(family),
order_richness = n_distinct(order),
.groups = "drop"
)
df_sf <- st_as_sf(df_sites, coords = c("decimalLongitude", "decimalLatitude"), crs = 4326)
df_sf <- st_transform(df_sf, 3857)
coords <- st_coordinates(df_sf)
knn_nb <- knn2nb(knearneigh(coords, k = 5))
lw <- nb2listw(knn_nb, style = "W")
moran.test(df_sf$genus_richness, lw)
Moran I test under randomisation
data: df_sf$genus_richness
weights: lw
Moran I statistic standard deviate = 28.4, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Moran I statistic Expectation Variance
1.000000000 -0.003731343 0.001249102
moran.test(df_sf$family_richness, lw)
Moran I test under randomisation
data: df_sf$family_richness
weights: lw
Moran I statistic standard deviate = 28.358, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Moran I statistic Expectation Variance
1.000000000 -0.003731343 0.001252803
moran.test(df_sf$order_richness, lw)
Moran I test under randomisation
data: df_sf$order_richness
weights: lw
Moran I statistic standard deviate = 28.346, p-value < 2.2e-16
alternative hypothesis: greater
sample estimates:
Moran I statistic Expectation Variance
1.000000000 -0.003731343 0.001253881
library(spdep)
moran.plot(df_sf$genus_richness, lw,
main = "Moran Scatterplot: Genus Richness",
xlab = "Genus Richness",
ylab = "Spatial Lag of Genus Richness")Community composition and diversity
Taxonomic profiling
Shows us the phylum composition of the samples of each site
library(dplyr)
df_rel_abund <- df %>%
group_by(siteID, phylum) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(siteID) %>%
mutate(rel_abund = count / sum(count))library(ggplot2)
ggplot(df_rel_abund, aes(x = siteID, y = rel_abund, fill = phylum)) +
geom_bar(stat = "identity") +
labs(
title = "Taxonomic Profile by Site",
x = "Site ID",
y = "Relative Abundance",
fill = "Phylum"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "right"
)Co-occurrence and network analysis
Taxonomic co-occurrence heat map
Shows which genera are more likely to occur close to each other
library(dplyr)
library(tidyr)
library(ggplot2)
library(pheatmap)
df <- read.csv("NEON_soilMAGs_soilChem.csv")
# Step 1: Create a presence/absence matrix of taxa by sample
taxa_matrix <- df %>%
select(genomicsSampleID, genus) %>%
mutate(present = 1) %>%
distinct() %>%
pivot_wider(names_from = genus, values_from = present, values_fill = 0)
# Step 2: Compute co-occurrence (pairwise correlations between genera)
# Remove sample ID column
taxa_only <- taxa_matrix %>% select(-genomicsSampleID)
# Remove genera with no variation across samples
taxa_filtered <- taxa_only[, apply(taxa_only, 2, sd) != 0]
# Recompute correlation
co_occurrence <- cor(taxa_filtered, method = "spearman")
# Replace NA/NaN/Inf with 0 or remove problematic rows/columns
co_occurrence[!is.finite(co_occurrence)] <- 0
# Step 3: Plot heatmap
pheatmap(co_occurrence,
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
clustering_method = "ward.D2",
main = "Genus Co-occurrence Heatmap",
color = colorRampPalette(c("blue", "white", "red"))(50))